Read Oceano Bottle data

# packages
if (!require("librarian")){
  install.packages("librarian")
  library(librarian)
}
librarian::shelf(
  dplyr, DT, dygraphs, glue, gstat, here, lubridate, mapview, purrr, readr, 
  raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select

# paths
dir_data <- switch(
  Sys.info()["nodename"],
  `ben-mbpro` = "/Users/bbest/My Drive (ben@ecoquants.com)/projects/calcofi/data",
  `Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data")
  # TODO: get Erin's Google Drive path and "nodename")
bottle_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Bottle.csv")
cast_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Cast.csv")
calcofi_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull.geojson")
calcofi_offshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_offshore.geojson")
calcofi_nearshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_nearshore.geojson")

# check paths
stopifnot(dir.exists(dir_data))
stopifnot(any(file.exists(bottle_csv,cast_csv)))

# read data
d_bottle <- read_csv(bottle_csv, skip=1, col_names = F)
names(d_bottle) <- str_split(
  readLines(bottle_csv, n=1), ",")[[1]] %>% 
  str_replace("\xb5", "µ")
d_cast   <- read_csv(cast_csv)

# join data
d <- d_cast %>% 
  left_join(
    d_bottle %>% select(-Sta_ID),
    by = "Cst_Cnt") %>% 
  mutate(Date = lubridate::as_date(Date, format = "%m/%d/%Y"))

Exploratory summary of bottle data

# d_cast
skim(d_cast)
# d_bottle
skim(d_bottle)

Get CINMS AOI

# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"

cinms_ply <- sf::st_read(sanctuaries_geo) %>%
  dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source 
##   `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS:  WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)
#cinms_txt

mapview(cinms_ply)

Get Station IDs as points

# for summary, want to group by Sta_Code because each data point has a diff Sta_ID
pts <- d %>% 
  filter(
    !is.na(Lat_Dec),
    !is.na(Lon_Dec)) %>% 
  group_by(
    Sta_ID) %>%
  summarize(
    lon      = mean(Lon_Dec),
    lat      = mean(Lat_Dec)) %>%
  st_as_sf(
    coords = c("lon", "lat"), crs=4326, remove = F) %>% 
  separate(
    Sta_ID, c("Sta_ID_Line", "Sta_ID_Station"), 
    sep=" ", remove=F) %>% 
  mutate(
    Sta_ID_Line    = as.double(Sta_ID_Line),
    Sta_ID_Station = as.double(Sta_ID_Station),
    offshore       = ifelse(Sta_ID_Station > 60, T, F))
mapview(pts, zcol="offshore")
table(pts$offshore)
## 
## FALSE  TRUE 
##  1534  1100

Make CalCOFI total study areas

Per guidance from Erin:

CalCOFI Regions– Station # > 60 = oceanic/offshore; <60 = neritic/nearshore/continental shelf (other regions include those from Venrick et al. 2012 or Stephens et al. 2018

make_hull <- function(x){
  st_union(x) %>% 
  st_convex_hull() %>% 
  st_buffer(0.1) %>%  # ~10 km
  ms_simplify(keep = 0.05) }

hull_geos <- c(calcofi_geo, calcofi_nearshore_geo, calcofi_offshore_geo)
if (any(!file.exists(hull_geos))){

  hull <- pts %>% 
    make_hull()
  st_write(hull, calcofi_geo)
  mapview(hull)
  
  hull_nearshore <- pts %>% 
    filter(!offshore) %>% 
    make_hull()
  st_write(hull_nearshore, calcofi_nearshore_geo)
  mapview(hull_nearshore)
  
  hull_offshore <- pts %>% 
    filter(offshore) %>% 
    make_hull()
  st_write(hull_offshore, calcofi_offshore_geo)
  mapview(hull_offshore)
}

Get latest variable for AOI

aoi = cinms_ply

# find stations in aoi
pts_aoi <- pts %>% 
  mutate(
    x = st_intersects(pts, aoi) %>% as.logical()) %>% 
  filter(x)
  
mapview(aoi) +
  mapview(pts_aoi)
get_oceano_var_daily_aoi <- function(var, aoi, depth_min=0, depth_max=10){

  # find stations in aoi
  pts_aoi <- pts %>% 
    mutate(
      x = st_intersects(pts, aoi) %>% as.logical()) %>% 
    filter(x)
  
  d_aoi <- d %>% 
    filter(Sta_ID %in% pts_aoi$Sta_ID)
  
  d_aoi_daily <- d_aoi %>% 
    filter(
      !is.na(.data[[var]]),
      Depthm >= depth_min,
      Depthm < depth_max) %>% 
    group_by(Date) %>% 
    summarize(
      var_n   = n(),
      var_min = min(.data[[var]], na.rm = T),
      var_q10 = quantile(.data[[var]], probs = 0.10, na.rm = T),
      var_avg = mean(.data[[var]], na.rm = T),
      var_q90 = quantile(.data[[var]], 0.90, na.rm = T),
      var_max = max(.data[[var]], na.rm = T),
      var_sd  = sd(.data[[var]], na.rm = T))
}

v <- get_oceano_var_daily_aoi("T_degC", cinms_ply, 0, 10)
datatable(v)

Plot time series

# TODO: make this a function for any variable, 
#         with a schema to apply for labels and colors
# see: [plot_metric_timeseries()](https://github.com/noaa-onms/onmsR/blob/2e438a9bdff8ee90b8fc811aafae4520c26049ab/R/spatial.R#L16-L49)

x <- v %>% select(
  Date, 
  `10% quantile` = var_q10,
  `average`      = var_avg,
  `90% quantile` = var_q90)
y <- xts::xts(x = x %>% select(-Date), order.by = x %>% pull(Date))

dygraph(
  y,
  main = "Sea Surface Temperature",
  xlab = "Date", ylab = "Temperature (°C)") %>% # ...) %>%
  dySeries(
    c("10% quantile", "average", "90% quantile"), 
    label = "Temperature (°C)", color = "Red") %>%
  dyRangeSelector(fillColor = " #FFFFFF", strokeColor = "#FFFFFF")

Get time series of variable at given step (eg annual) within AOI

Get raster of variable for latest within AOI

var       = "T_degC"
aoi       = cinms_ply
depth_min = 0
depth_max = 10

# find stations in aoi
pts_aoi <- pts %>% 
  mutate(
    x = st_intersects(pts, aoi) %>% as.logical()) %>% 
  filter(x)

d_aoi <- d %>% 
  filter(Sta_ID %in% pts_aoi$Sta_ID)

d_aoi_daily <- d_aoi %>% 
  filter(
    !is.na(.data[[var]]),
    Depthm >= depth_min,
    Depthm < depth_max) %>% 
  group_by(Date, Sta_ID) %>% 
  summarize(
    var_n   = n(),
    var_avg = mean(.data[[var]], na.rm = T), 
    .groups = "drop") %>% 
  arrange(desc(Date))
d_aoi_daily
## # A tibble: 376 × 4
##    Date       Sta_ID      var_n var_avg
##    <date>     <chr>       <int>   <dbl>
##  1 2020-01-15 083.3 051.0     2    14.0
##  2 2019-11-14 083.3 051.0     4    16.7
##  3 2019-07-22 083.3 051.0     5    14.5
##  4 2019-04-09 083.3 051.0     2    12.9
##  5 2019-02-11 083.3 051.0     2    14.1
##  6 2018-10-25 083.3 051.0     2    17.2
##  7 2018-06-20 083.3 051.0     2    15.6
##  8 2018-04-17 083.3 051.0     2    12.5
##  9 2018-02-06 083.3 051.0     3    15.2
## 10 2017-11-17 083.3 051.0     2    16.2
## # … with 366 more rows

Wierd: only ~one station per day.

Let’s look without AOI

get_oceano_var_cruise_raster <- function(cruise_id, var, depth_min, depth_max){
  d_daily <- d %>% 
    filter(
      !is.na(.data[[var]]),
      Depthm >= !!depth_min,
      Depthm < !!depth_max,
      Cruise_ID == !!cruise_id) %>% 
    group_by(Cruise_ID, Date, Sta_ID) %>% 
    summarize(
      var_n   = n(),
      var_avg = mean(.data[[var]], na.rm = T), 
      .groups = "drop") %>% 
    arrange(desc(Date), Cruise_ID)
  d_daily
  
  p <- pts %>% 
    left_join(
      d_daily,
      by="Sta_ID") %>% 
    select(Sta_ID, Date, var_avg) %>% 
    filter(!is.na(var_avg))
  mapview(p, zcol="var_avg")
  
  h <- st_convex_hull(st_union(p)) %>% st_as_sf() %>% 
    mutate(one = 1)
  mapview(h)
  r <- raster(as_Spatial(h), res=0.1, crs=4326)
  z <- rasterize(as_Spatial(h), r, "one")
  
  # inverse distance weighted interpolation
  #   https://rspatial.org/raster/analysis/4-interpolation.html
  gs <- gstat(formula=var_avg~1, locations=p)
  idw <- interpolate(z, gs)
  w <- mask(idw, z)
  
  w_tif <- here("data/_test_idw.tif")
  raster::writeRaster(w, w_tif, overwrite=T)
  w <- raster(w_tif) %>% readAll()
  unlink(w_tif)
  
  w
}

w <- get_oceano_var_cruise_raster(
  cruise_id = "2020-01-05-C-33RL",
  var       = "T_degC", 
  depth_min = 0,
  depth_max = 10)
## [inverse distance weighted interpolation]
mapview(w)

TODO

summarize:

  • OCEAN TEMP: T_degC

  • salinity: Salnty

  • OXYGEN (HYPOXIA)

  • ZOOPLANKTON

  • ICHTHYOPLANKTON for each station ID

  • Create read_bottle() in calcofi4r